##Setting up working directory
setwd("~/motif_analysis")

##Loading Packages 
library(GenomicRanges)
library(SummarizedExperiment)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Mmusculus.UCSC.mm10)
library(monaLisa)
library(ComplexHeatmap)
library(circlize)

##read peak profile
pfc_myt1l_promoter<-read.csv("peaks/pfc_tss_peaks.csv",header = TRUE)
pfc_myt1l_active_enhancer<-read.csv("peaks/histone/pfc/pfc_cr_myt1l_active_enhancer.csv",header = TRUE)
pfc_myt1l_poised_enhancer<-read.csv("peaks/histone/pfc/pfc_cr_myt1l_poised_enhancer.csv",header = TRUE)

pfc_atac_tss<-read.csv('peaks/atac_pfc_tss_peaks.csv', header = TRUE)
pfc_active_enhancer<-read.csv('peaks/histone/pfc/pfc_active_enhancer.csv', header = TRUE)
pfc_poised_enhancer<-read.csv('peaks/histone/pfc/pfc_poised_enhancer.csv', header = TRUE)

##Establishing GRange files
pfc_myt1l_promoter_range<-GRanges( seqnames = Rle(pfc_myt1l_promoter$Chr),
                             ranges = IRanges(pfc_myt1l_promoter$Start, pfc_myt1l_promoter$End),
                             strand = Rle(rep(c("+"))),
                             peakid = pfc_myt1l_promoter$ï..PeakID)
pfc_myt1l_active_enhancer_range<-GRanges( seqnames = Rle(pfc_myt1l_active_enhancer$Chr),
                                    ranges = IRanges(pfc_myt1l_active_enhancer$Start, pfc_myt1l_active_enhancer$End),
                                    strand = Rle(rep(c("+"))),
                                    peakid = pfc_myt1l_active_enhancer$ï..PeakID)
pfc_myt1l_poised_enhancer_range<-GRanges( seqnames = Rle(pfc_myt1l_poised_enhancer$Chr),
                                    ranges = IRanges(pfc_myt1l_poised_enhancer$Start, pfc_myt1l_poised_enhancer$End),
                                    strand = Rle(rep(c("+"))),
                                    peakid = pfc_myt1l_poised_enhancer$ï..PeakID)

pfc_atac_tss_range<-GRanges( seqnames = Rle(pfc_atac_tss$Chr),
                                   ranges = IRanges(pfc_atac_tss$Start, pfc_atac_tss$End),
                                   strand = Rle(rep(c("+"))),
                                   peakid = pfc_atac_tss$ï..genes)
pfc_active_enhancer_range<-GRanges( seqnames = Rle(pfc_active_enhancer$Chr),
                                          ranges = IRanges(pfc_active_enhancer$Start, pfc_active_enhancer$End),
                                          strand = Rle(rep(c("+"))),
                                          peakid = pfc_active_enhancer$ï..PeakID)
pfc_poised_enhancer_range<-GRanges( seqnames = Rle(pfc_poised_enhancer$Chr),
                                          ranges = IRanges(pfc_poised_enhancer$Start, pfc_poised_enhancer$End),
                                          strand = Rle(rep(c("+"))),
                                          peakid = pfc_poised_enhancer$ï..PeakID)
                                          
##Motif analysis for promoter
#de novo
pfc_myt1l_promoter_seqs <- getSeq(BSgenome.Mmusculus.UCSC.mm10, pfc_myt1l_promoter_range)
se_km_pfc_promoter<-calcBinnedKmerEnr(seqs = pfc_myt1l_promoter_seqs, kmerLen = 5, BPPARAM = BiocParallel::SnowParam(1), 
                                       background = "genome", genome = BSgenome.Mmusculus.UCSC.mm10,genome.regions = pfc_atac_tss_range,
                                       includeRevComp = TRUE, genome.oversample = 2)

pfc_myt1l_promoter_selkm <- apply(assay(se_km_pfc_promoter, "negLog10Padj"), 1, 
                                     function(x) max(abs(x), 0, na.rm = TRUE)) > 20
pfc_myt1l_promoter_sekmSel <- se_km_pfc_promoter[pfc_myt1l_promoter_selkm, ]
#known motif
se_pfc_myt1l_promoter_selkm<-calcBinnedMotifEnrR(seqs = pfc_myt1l_promoter_seqs, pwmL = pwms, BPPARAM = BiocParallel::SnowParam(1), background = "genome", genome = BSgenome.Mmusculus.UCSC.mm10,
                                                    genome.regions = pfc_atac_tss_range, genome.oversample = 2)
sel4_pfc_myt1l_promoter <- apply(assay(se_pfc_myt1l_promoter_selkm, "negLog10Padj"), 1, 
                                    function(x) max(abs(x), 0, na.rm = TRUE)) > 10

seSel4_pfc_myt1l_promoter <- se_pfc_myt1l_promoter_selkm[sel4_pfc_myt1l_promoter, ]
#filtering for expressed TFs only
i_express<-toupper(elementMetadata(seSel4_pfc_myt1l_promoter)$motif.name)%in%toupper(pfc_dge$external_gene_name)
seSel4_pfc_myt1l_promoter_expressed<-seSel4_pfc_myt1l_promoter[i_express,]
#clustering
SimMatSel4_pfc_myt1l_promoter_expressed <- motifSimilarity(rowData(seSel4_pfc_myt1l_promoter_expressed)$motif.pfm, BPPARAM = BiocParallel::SnowParam(1))
hcl4_pfc_myt1l_promoter <- hclust(as.dist(1 - SimMatSel4_pfc_myt1l_promoter_expressed), method = "average")

##Motif analysis for active enhancer
# de novo
pfc_myt1l_active_enhancer_seqs <- getSeq(BSgenome.Mmusculus.UCSC.mm10, pfc_myt1l_active_enhancer_range)
se_km_pfc_active_enhancer<-calcBinnedKmerEnr(seqs = pfc_myt1l_active_enhancer_seqs, kmerLen = 5, BPPARAM = BiocParallel::SnowParam(1), 
                                      background = "genome", genome = BSgenome.Mmusculus.UCSC.mm10,genome.regions = pfc_active_enhancer_range,
                                      includeRevComp = TRUE, genome.oversample = 2)

pfc_myt1l_active_enhancer_selkm <- apply(assay(se_km_pfc_active_enhancer, "negLog10Padj"), 1, 
                                  function(x) max(abs(x), 0, na.rm = TRUE)) > 10
pfc_myt1l_active_enhancer_sekmSel <- se_km_pfc_active_enhancer[pfc_myt1l_active_enhancer_selkm, ]
#known motif
se_pfc_myt1l_active_enhancer_selkm<-calcBinnedMotifEnrR(seqs = pfc_myt1l_active_enhancer_seqs, pwmL = pwms, BPPARAM = BiocParallel::SnowParam(1), background = "genome", genome = BSgenome.Mmusculus.UCSC.mm10,
                                                 genome.regions = pfc_active_enhancer_range, genome.oversample = 2)
sel4_pfc_myt1l_active_enhancer <- apply(assay(se_pfc_myt1l_active_enhancer_selkm, "negLog10Padj"), 1, 
                                 function(x) max(abs(x), 0, na.rm = TRUE)) > 10
seSel4_pfc_myt1l_active_enhancer <- se_pfc_myt1l_active_enhancer_selkm[sel4_pfc_myt1l_active_enhancer, ]
#filtering for expressed TFs only
i_express<-toupper(elementMetadata(seSel4_pfc_myt1l_active_enhancer)$motif.name)%in%toupper(pfc_dge$external_gene_name)
seSel4_pfc_myt1l_active_enhancer_expressed<-seSel4_pfc_myt1l_active_enhancer[i_express,]
#clustering
SimMatSel4_pfc_myt1l_active_enhancer_expressed <- motifSimilarity(rowData(seSel4_pfc_myt1l_active_enhancer_expressed)$motif.pfm, BPPARAM = BiocParallel::SnowParam(1))
hcl4_pfc_myt1l_active_enhancer <- hclust(as.dist(1 - SimMatSel4_pfc_myt1l_active_enhancer_expressed), method = "average")
